clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile generates empirical summary statistics as provided in sections 2 and 3 and in internet appendix D. 
% 
%  =======================================================================================================================================================
% 
%  Major output:
%  - Figure 1: Growth of cross-fund holdings across major fund domiciles around the world. 
%  - Figure 2: Cross-fund holdings in Germany over time.
%  - Table 1: Summary statistics across all fund-month observations.
%  - Figure IA.5: Aggregate total assets under management (in EUR trillion) of the German fund sector, broken down by fund category and retail/institutional.
%
%  =======================================================================================================================================================
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================

projectpath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectpath, 'Code\matlab_functions'))


%% Figure 1: Growth of cross-fund holdings across major fund domiciles around the world. 

load(strcat(projectpath, 'Data\ECB_SDW\ECB_IF_shares.mat'))

all_data                                    = outerjoin(ECB_data, US_data, 'keys', 'DATUM');
all_data(isnan(all_data.US_TA),:)           = []; % show yearly values only
all_data(all_data.DATUM_ECB_data<200900,:)  = [];
dates                                       = floor(all_data.DATUM_ECB_data./100); 

close
y = 100 * [all_data.All_IF ./ all_data.All_TA all_data.DE_IF ./ all_data.DE_TA all_data.IE_IF ./ all_data.IE_TA all_data.LU_IF ./ all_data.LU_TA all_data.US_IF ./ (all_data.US_TA + all_data.US_IF)];  
plot(dates,y(:,1),'k-o',dates,y(:,2),'b-s',dates,y(:,3),'k:',dates,y(:,4),'k--',dates,y(:,5),'r-*');
xlabel('Year'); ylabel('Cross-fund holdings (in percent of aggregate TNA)'); 
legend('Euro area total','Germany','Ireland','Luxembourg','U.S.*','Location','NorthWest');
axis tight; xtickangle(45); ylim([0 25]); grid on; 
saveas(gcf, strcat(projectpath, 'Paper\Figures\Fig1_Growth of cross-fund holdings across major fund domiciles.png'),'png');


%%  ================================================================================ Preparations ================================================================================


clear all;
close all;

projectpath = 'C:\ConnectedFunds_Codebase\'

% Load fund-level data
load(strcat(projectpath, 'Data\IFS\IFS_All_CleanData.mat'), 'IFS_All')

% Filter fund-level data
IFS_All(IFS_All.DATUM<200909,:)         = [];
IFS_All(IFS_All.DATUM>202006,:)         = [];
umonth                                  = unique(IFS_All.DATUM);
Tmonth                                  = length(umonth);
IFS_All(IFS_All.etf == 1,:)             = [];
IFS_All(IFS_All.FONDSVERM <= 0, :)      = [];
IFS_All(IFS_All.bilanzsumme <= 0, :)    = [];


%%  ======== Figure IA.5: Aggregate total assets under management (in EUR trillion) of the German fund sector, broken down by fund category and retail/institutional flag. ==========


% Loop over time and generate aggregate fund assets, by fund type and asset class
for t = 1:length(umonth)

    TotalAssets_Publikum(t, 1)              = nansum(IFS_All.bilanzsumme(IFS_All.SPEZIAL == 0 & IFS_All.DATUM==umonth(t)));
    TotalAssets_Spezial(t, 1)               = nansum(IFS_All.bilanzsumme(IFS_All.SPEZIAL == 1 & IFS_All.DATUM==umonth(t)));

    for a = [1 2 3 7]
        TotalAssets_ByFundType_Publikum(t, a)   = nansum(IFS_All.bilanzsumme(IFS_All.artmittel==a & IFS_All.SPEZIAL == 0 & IFS_All.DATUM==umonth(t)));
        TotalAssets_ByFundType_Spezial(t, a)    = nansum(IFS_All.bilanzsumme(IFS_All.artmittel==a & IFS_All.SPEZIAL == 1 & IFS_All.DATUM==umonth(t)));
    end

end

dates = datenum(num2str(umonth), 'YYYYmm');

SectorSize = [TotalAssets_ByFundType_Publikum(:, 1) TotalAssets_ByFundType_Spezial(:, 1) ...
              TotalAssets_ByFundType_Publikum(:, 2) TotalAssets_ByFundType_Spezial(:, 2) ...  
              TotalAssets_ByFundType_Publikum(:, 3) TotalAssets_ByFundType_Spezial(:, 3) ...
              TotalAssets_ByFundType_Publikum(:, 7) TotalAssets_ByFundType_Spezial(:, 7)];

SectorSize_Other = [TotalAssets_Publikum - sum(TotalAssets_ByFundType_Publikum')' ...
                    TotalAssets_Spezial - sum(TotalAssets_ByFundType_Spezial')'];

SectorSize_Other = sum(SectorSize_Other')';                
                
close;
h= area(dates, (10^-9) * [SectorSize  SectorSize_Other]); datetick('x', 'YYYY');
h(1).FaceColor = 'blue';
h(1).FaceAlpha = 0.3;
h(2).FaceColor = 'blue';
h(3).FaceColor = 'red';
h(3).FaceAlpha = 0.3;
h(4).FaceColor = 'red';
h(5).FaceColor = 'green';
h(5).FaceAlpha = 0.3;
h(6).FaceColor = 'green';
h(7).FaceColor = 'cyan';
h(7).FaceAlpha = 0.3;
h(8).FaceColor = 'cyan';
h(9).FaceColor = 'black';
h(9).FaceAlpha = 0.3;
xlabel('Year'); ylabel('Assets under management (� trillion )'); 
leg = legend('Equity-Retail','Equity-Inst.',...
        'Bond-Retail','Bond-Inst.',...
        'Mixed-Retail','Mixed-Inst.',...
        'FoF-Retail','FoF-Inst.',...
        'Other','Location','BestOutside');
title(leg,'Fund type:');    
grid on; axis square; axis tight; xtickangle(45); ylim([0 2.5]); 
saveas(gcf,strcat(projectpath, 'Paper\Figures\FigIA5_Aggregate total assets under management of the German fund sector.png'),'png');   


%%  ============================ Table 1: Summary statistics across all fund-month observations. (Step A: generate dataset) ============================


% Kick-out funds other than equity, bond, mixed securities fund or fund-of-funds 
IFS_All(IFS_All.artmittel == 5 | IFS_All.artmittel == 6 | IFS_All.artmittel > 7, :) = [];
N_artmittel = max(IFS_All.artmittel);

% Generate ISIN lists of MMFs and ETFs (Based on Morningstar data that we cannot share)
data = readtable(strcat(projectpath, 'Data\MS\MMF_Global_FundData.xlsx'));
ummf = unique([data.ISIN; IFS_All.ISIN(IFS_All.artmittel==6)]);
clear data;

data = readtable(strcat(projectpath, 'Data\MS\ETF_Global_FundData.xlsx'));
uetf = unique([data.ISIN; IFS_All.ISIN(IFS_All.etf==1)]);
clear data;
    
% Set-up result variables
TotalFundHoldingsMMF_Publikum           = zeros(Tmonth,1);
TotalFundHoldingsMMF_Spezial            = zeros(Tmonth,1);

TotalFundHoldingsETF_Publikum           = zeros(Tmonth,1);
TotalFundHoldingsETF_Spezial            = zeros(Tmonth,1);

TotalFundHoldingsDE_Publikum            = zeros(Tmonth,1);
TotalFundHoldingsDE_Spezial             = zeros(Tmonth,1);

TotalFundHoldings_ByFundType_Publikum   = zeros(Tmonth,7);
TotalFundHoldings_ByFundType_Spezial    = zeros(Tmonth,7);

TotalAssets_ByFundType_Publikum         = zeros(Tmonth,7);
TotalAssets_ByFundType_Spezial          = zeros(Tmonth,7);

TotalFundHoldings_Publikum              = zeros(Tmonth,1);
TotalFundHoldings_Spezial               = zeros(Tmonth,1);

TotalAssets_Publikum                    = zeros(Tmonth,1);
TotalAssets_Spezial                     = zeros(Tmonth,1);

TotalCash_Publikum                      = zeros(Tmonth,1);
TotalCash_Spezial                       = zeros(Tmonth,1);

TotalCash_ByFundType_Publikum           = zeros(Tmonth,7);
TotalCash_ByFundType_Spezial            = zeros(Tmonth,7);

% Loop over time and generate fund-month panel
for t = Tmonth : -1 : 1

    t,
    
    close;
    
    fund_data = IFS_All(IFS_All.DATUM==umonth(t) & IFS_All.bilanzsumme>1 & IFS_All.FONDSVERM>1,:);
    fund_data = sortrows(fund_data,'ISIN','ascend');
    
    TotalAssets_Publikum(t) = nansum(fund_data.bilanzsumme(fund_data.SPEZIAL==0));
    TotalAssets_Spezial(t)  = nansum(fund_data.bilanzsumme(fund_data.SPEZIAL==1));

    for artmittel=1 : N_artmittel
        TotalAssets_ByFundType_Publikum(t,artmittel) = nansum(fund_data.bilanzsumme(fund_data.artmittel==artmittel & fund_data.SPEZIAL==0));
        TotalAssets_ByFundType_Spezial(t,artmittel)  = nansum(fund_data.bilanzsumme(fund_data.artmittel==artmittel & fund_data.SPEZIAL==1));
    end
    
    TotalCash_Publikum(t) = nansum(fund_data.BANKG(fund_data.SPEZIAL==0));
    TotalCash_Spezial(t)  = nansum(fund_data.BANKG(fund_data.SPEZIAL==1));
    
    for artmittel=1 : N_artmittel
        TotalCash_ByFundType_Publikum(t,artmittel) = nansum(fund_data.BANKG(fund_data.artmittel==artmittel & fund_data.SPEZIAL==0));
        TotalCash_ByFundType_Spezial(t,artmittel)  = nansum(fund_data.BANKG(fund_data.artmittel==artmittel & fund_data.SPEZIAL==1));
    end
    
    % Load funds' portfolio data
    load(strcat(projectpath, 'Data\IFS\mat\IFS_Holdings_',num2str(umonth(t)),'.mat'));
    
    % Add security-level information from csdb data
    load(strcat(projectpath, 'Data\CSDB\mat\CSDB_',num2str(umonth(t)),'.mat'))

    % Set asset type (equity, bond, fund share)
    uasset  = unique([IFS_Holdings.ISIN; IFS_Holdings.SECCODE; fund_data.ISIN]);
    [~,idx] = ismember(uasset,CSDB.ISIN);
    
    if umonth(t) > 201301
        AssetType = cell(length(uasset),1);
        AssetType(idx>0) =  cellstr(CSDB.ESA_INS_2010(idx(idx>0)));

        AssetType_num = zeros(length(uasset),1);
        AssetType_num(idx>0) =  1 * startsWith(AssetType(idx>0),"F_3") + ... % bond
                                2 * startsWith(AssetType(idx>0),"F_51") + ... % equity
                                3 * startsWith(AssetType(idx>0),"F_52");    % Fund share
    else
        AssetType = cell(length(uasset),1);
        AssetType(idx>0) =  cellstr(CSDB.ESA_INS_1995(idx(idx>0)));

        AssetType_num = zeros(length(uasset),1);
        AssetType_num(idx>0) =  1 * startsWith(AssetType(idx>0), "F.3") + ... % bond
                                2 * startsWith(AssetType(idx>0), "F.51") + ... % equity
                                3 * startsWith(AssetType(idx>0), "F.52");    % Fund share                                
    end
    clear j AssetType;
    
    ufund                = fund_data.ISIN;
    [~,idx]              = ismember(uasset,ufund);
    AssetType_num(idx>0) = 3;
    
    clear idx* CSDB;
        
    [~,idx]                       = ismember(IFS_Holdings.SECCODE,uasset);
    IFS_Holdings.AssetType        = zeros(height(IFS_Holdings),1);
    IFS_Holdings.AssetType(idx>0) = AssetType_num(idx(idx>0));
    
    % Keep only fund shares
    [~,idx]             = ismember(fund_data.ISIN,ufund);
    fund_data(idx==0,:) = [];
    
    [~,idx]                 = ismember(IFS_Holdings.ISIN,ufund);
    IFS_Holdings(idx==0,:)  = [];
    
    clear idx;
    
    IFS_Holdings(IFS_Holdings.AssetType < 3,:) = [];
    
    ufund    = fund_data.ISIN;  
    uasset   = unique(IFS_Holdings.SECCODE);
    [~,idx1] = ismember(IFS_Holdings.ISIN,ufund);
    [~,idx2] = ismember(IFS_Holdings.SECCODE,ufund);
    [~,idx3] = ismember(IFS_Holdings.SECCODE,uasset);
    [~,idx4] = ismember(IFS_Holdings.SECCODE,ummf);
    [~,idx5] = ismember(IFS_Holdings.SECCODE,uetf);
    
    Network     = accumarray([idx1 idx3],IFS_Holdings.AMOUNT,[length(ufund) length(uasset)]);
    Network_DE  = accumarray([idx1(idx2>0) idx2(idx2>0)],IFS_Holdings.AMOUNT(idx2>0),[length(ufund) length(ufund)]);
    Network_MMF = accumarray([idx1(idx4>0) idx4(idx4>0)],IFS_Holdings.AMOUNT(idx4>0),[length(ufund) length(ummf)]);
    Network_ETF = accumarray([idx1(idx5>0) idx5(idx5>0)],IFS_Holdings.AMOUNT(idx5>0),[length(ufund) length(uetf)]);
      
    % Add fund holdings to ifs data
    fund_data.Fund_Holdings    = sum(Network')';
    fund_data.Fund_Holdings_DE = sum(Network_DE')';
    
    % Generate aggregate holdings
    TotalFundHoldings_Publikum(t) = sum(sum(Network(fund_data.SPEZIAL==0,:)));
    TotalFundHoldings_Spezial(t)  = sum(sum(Network(fund_data.SPEZIAL==1,:)));
    
    for artmittel = [1 2 3 7]
       TotalFundHoldings_ByFundType_Publikum(t,artmittel) = sum(sum(Network(fund_data.SPEZIAL==0 & fund_data.artmittel == artmittel,:)));
       TotalFundHoldings_ByFundType_Spezial(t,artmittel)  = sum(sum(Network(fund_data.SPEZIAL==1 & fund_data.artmittel == artmittel,:)));
    end
    
    TotalFundHoldingsDE_Publikum(t) = sum(sum(Network_DE(fund_data.SPEZIAL==0,:)));
    TotalFundHoldingsDE_Spezial(t)  = sum(sum(Network_DE(fund_data.SPEZIAL==1,:)));
    
    TotalFundHoldingsMMF_Publikum(t) = sum(sum(Network_MMF(fund_data.SPEZIAL==0,:)));
    TotalFundHoldingsMMF_Spezial(t)  = sum(sum(Network_MMF(fund_data.SPEZIAL==1,:)));
    
    TotalFundHoldingsETF_Publikum(t) = sum(sum(Network_ETF(fund_data.SPEZIAL==0,:)));
    TotalFundHoldingsETF_Spezial(t)  = sum(sum(Network_ETF(fund_data.SPEZIAL==1,:))); 

    if t == Tmonth
       IFS_All_WithHoldings = fund_data;
    else
       IFS_All_WithHoldings = [IFS_All_WithHoldings; fund_data];
    end

    clear Network* a* fund_data date idx* IFS_Holdings i* j tmp uasset ufund;

end

clear IFS_All t


%%  ============================ Table 1: Summary statistics across all fund-month observations. (Step B: Build summary figures for Table 1) ============================


length(unique(IFS_All_WithHoldings.ISIN))

[length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.artmittel == 1))) ...
length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.artmittel == 2))) ...
length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.artmittel == 3))) ...
length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.artmittel == 7)))]

[length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.SPEZIAL == 0))) ...
 length(unique(IFS_All_WithHoldings.ISIN(IFS_All_WithHoldings.SPEZIAL == 1)))]   

%  Build summary figures for Table 1: TNA, Return, Netflows, Cash holdings and fund holdings, by fund type 
X = [];
for i = 1:18
    if i == 1
        x  = (10^-6)*IFS_All_WithHoldings.FONDSVERM;
    elseif i == 2
        x  = 100*IFS_All_WithHoldings.ReturnDiv;
    elseif i == 3
        x  = 100*IFS_All_WithHoldings.Netflows; 
    elseif i == 4
        x  = 100*IFS_All_WithHoldings.BANKG./IFS_All_WithHoldings.FONDSVERM;
    elseif i == 5
        x  = 100*IFS_All_WithHoldings.Fund_Holdings./IFS_All_WithHoldings.FONDSVERM;
    elseif i == 6
        x  = 100*IFS_All_WithHoldings.Fund_Holdings_DE./IFS_All_WithHoldings.FONDSVERM;
    
        % spezial    
    elseif i == 7
        x  = (10^-6)*IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 1);
    elseif i == 8
        x  = 100*IFS_All_WithHoldings.ReturnDiv(IFS_All_WithHoldings.SPEZIAL == 1);
    elseif i == 9
        x  = 100*IFS_All_WithHoldings.Netflows(IFS_All_WithHoldings.SPEZIAL == 1); 
    elseif i == 10
        x  = 100*IFS_All_WithHoldings.BANKG(IFS_All_WithHoldings.SPEZIAL == 1)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 1);  
    elseif i == 11
        x  = 100*IFS_All_WithHoldings.Fund_Holdings(IFS_All_WithHoldings.SPEZIAL == 1)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 1); 
    elseif i == 12
        x  = 100*IFS_All_WithHoldings.Fund_Holdings_DE(IFS_All_WithHoldings.SPEZIAL == 1)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 1); 
        
        % publikum    
    elseif i == 13
        x  = (10^-6)*IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 0);
    elseif i == 14
        x  = 100*IFS_All_WithHoldings.ReturnDiv(IFS_All_WithHoldings.SPEZIAL == 0);
    elseif i == 15
        x  = 100*IFS_All_WithHoldings.Netflows(IFS_All_WithHoldings.SPEZIAL == 0); 
    elseif i == 16
        x  = 100*IFS_All_WithHoldings.BANKG(IFS_All_WithHoldings.SPEZIAL == 0)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 0);    
    elseif i == 17
        x  = 100*IFS_All_WithHoldings.Fund_Holdings(IFS_All_WithHoldings.SPEZIAL == 0)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 0);    
    elseif i == 18
        x  = 100*IFS_All_WithHoldings.Fund_Holdings_DE(IFS_All_WithHoldings.SPEZIAL == 0)./IFS_All_WithHoldings.FONDSVERM(IFS_All_WithHoldings.SPEZIAL == 0);    
    end
    
    % Winsorize data at the 1% percentile
    ql = quantile(x,0.01); qh = quantile(x,0.99);
    x(x<ql) = ql;
    x(x>qh) = qh;
    
    X(i,:) = [nanmean(x) nanstd(x) quantile(x,0.25) quantile(x,0.5) quantile(x,0.75)];
        
    clear x ql qh
end

%  Save table 1
writematrix(X, strcat(projectpath, 'Paper\Tables\Tab1_Summary statistics across all fund-month observations.xlsx'), 'Sheet', 1, 'Range', 'D3:H20')


%%  ======================================================== Figure 2: Cross-fund holdings in Germany over time. ========================================================

% Compute ratio of fund share holdings to funds' total holdings ...

... for all funds
y  = (TotalFundHoldings_Publikum+TotalFundHoldings_Spezial)./(TotalAssets_Publikum+TotalAssets_Spezial); 
y2 = (TotalFundHoldingsDE_Publikum+TotalFundHoldingsDE_Spezial)./(TotalAssets_Publikum+TotalAssets_Spezial); 
y3 = (TotalFundHoldingsMMF_Publikum+TotalFundHoldingsMMF_Spezial)./(TotalAssets_Publikum+TotalAssets_Spezial); 
y4 = (TotalFundHoldingsETF_Publikum+TotalFundHoldingsETF_Spezial)./(TotalAssets_Publikum+TotalAssets_Spezial); 

close all;
plot(dates, 100 * y, 'k-', dates, 100 * y2, 'b-', dates, 100 * y3, 'r-', dates, 100 * y4, 'g-');
axis square; grid on;
datetick('x','yyyy'); axis([min(dates) max(dates) 0 25]); xtickangle(45)
xlabel('Year'); ylabel('Aggregate share of fund holdings (in percent of TNA)'); 
legend('Total','of which: German','of which: MMFs','of which: ETFs','Location','NorthWest');
saveas(gcf, strcat(projectpath, 'Paper\Figures\Fig2_Cross-fund holdings in Germany over time.png'),'png');

... for retail funds
y  = (TotalFundHoldings_Publikum)./(TotalAssets_Publikum); 
y2 = (TotalFundHoldingsDE_Publikum)./(TotalAssets_Publikum); 
y3 = (TotalFundHoldingsMMF_Publikum)./(TotalAssets_Publikum); 
y4 = (TotalFundHoldingsETF_Publikum)./(TotalAssets_Publikum); 

close all;
plot(dates, 100 * y, 'k-', dates, 100 * y3, 'r-', dates, 100 * y4, 'g-');
axis square; grid on;
datetick('x','yyyy'); axis([min(dates) max(dates) 0 25]); xtickangle(45)
xlabel('Year'); ylabel('Aggregate share of fund holdings (in percent of TNA)'); title('Retail');
legend('Total','of which: MMFs','of which: ETFs','Location','NorthWest');
saveas(gcf, strcat(projectpath, 'Paper\Figures\Fig2L_Cross-fund holdings in Germany over time.png'),'png');

... for institutional funds
y  = (TotalFundHoldings_Spezial)./(TotalAssets_Spezial); 
y2 = (TotalFundHoldingsDE_Spezial)./(TotalAssets_Spezial); 
y3 = (TotalFundHoldingsMMF_Spezial)./(TotalAssets_Spezial); 
y4 = (TotalFundHoldingsETF_Spezial)./(TotalAssets_Spezial); 

close all;
plot(dates, 100 * y, 'k-', dates, 100 * y3, 'r-', dates, 100 * y4, 'g-');
axis square; grid on;
datetick('x','yyyy'); axis([min(dates) max(dates) 0 25]); xtickangle(45)
xlabel('Year'); ylabel('Aggregate share of fund holdings (in percent of TNA)'); title('Institutional');
legend('Total','of which: MMFs','of which: ETFs','Location','NorthWest');
saveas(gcf, strcat(projectpath, 'Paper\Figures\Fig2R_Cross-fund holdings in Germany over time.png'),'png');